'''
Created on Feb 9, 2010

@author: oabalbin
'''
from collections import defaultdict
from collections import deque

from signatures.common.classes import cmpName

class cpparser():

    def read_compound_names(self, inputfile,outfile):
        """
        It parses the compound names from the nci60 drug screen panel.
        """
        drugNames=defaultdict(deque)
        druglist=deque([])
        
        for line in inputfile:        
            line = line.strip('\n')
            fields = line.split(',')
            # empty line
            if fields[0]=='':
                continue
             
            nscid, casid, chmnames=fields[1],fields[2], fields[3:]
            drugKey=nscid+'_'+casid
            
            if len(chmnames) > 1:
                chmnames = ",".join(chmnames)
            if drugKey not in druglist:
                druglist.append(drugKey)
                
            drugNames[drugKey].append(chmnames) 
        
        
        for i, drug in enumerate(druglist):
            ids = drug.split('_')
            
            line = str(i)+'\t'+ids[0]+'\t'+ids[1]+'\t'+",".join(map(str,drugNames[drug]))+'\n'
            outfile.write(line)
             

gp=cpparser()
inputfile='/home/oabalbin/projects/networks/output/2010_03_20_21_13/drugsen/drugnames/factor_4_AllDrugNames.txt'
outputfile='/home/oabalbin/projects/networks/output/2010_03_20_21_13/drugsen/drugnames/factor_4_AllDrugNames_parsed.txt'
gp.read_compound_names(open(inputfile),open(outputfile,'w'))

